knitr::opts_chunk$set(comment = NA)

In this file, we will do an exploratory analysis of the final schemes datasheet which we obtained in the previous code.

Load the dataset and libraries.

library(readxl)
haq_final <- read_excel("C:/Users/Admin/Downloads/CivicDataLab/tasks/haq_final.xlsx")
View(haq_final)

library(dplyr)
Warning: package 'dplyr' was built under R version 4.1.3

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)
library(DT)
library(writexl)
Warning: package 'writexl' was built under R version 4.1.3

Check basic structure of the dataset.

dim(haq_final)
[1] 88343    15
str(haq_final)
tibble [88,343 x 15] (S3: tbl_df/tbl/data.frame)
 $ scheme_code                         : num [1:88343] 2.07e+12 2.07e+12 2.07e+12 2.07e+12 2.07e+12 ...
 $ scheme_name                         : chr [1:88343] "Pension payment to employees of Sainik School Lucknow" "Pension payment to employees of Sainik School Lucknow" "Pension payment to employees of Sainik School Lucknow" "Pension payment to employees of Sainik School Lucknow" ...
 $ Classification of the type of Scheme: chr [1:88343] "Benefits both boy and girl students directly" "Benefits both boy and girl students directly" "Benefits both boy and girl students directly" "Benefits both boy and girl students directly" ...
 $ Mode of Benefit Transfer            : chr [1:88343] "Payments & Awards" "Payments & Awards" "Payments & Awards" "Payments & Awards" ...
 $ scheme_allotment                    : num [1:88343] 23300000 20000000 20000000 24576000 8192000 ...
 $ scheme_expenditure                  : num [1:88343] 23300000 19911624 19911624 20662553 3004306 ...
 $ scheme_utilisation                  : chr [1:88343] "100.0" "99.56" "99.56" "84.08" ...
 $ district_name                       : chr [1:88343] "LUCKNOW COLL." "LUCKNOW COLL." "LUCKNOW COLL." "LUCKNOW COLL." ...
 $ ddo_name                            : chr [1:88343] "<U+091C><U+0928><U+0930><U+0932> <U+0938><U+0947><U+0915><U+094D><U+0930><U+0947><U+091F><U+0930><U+0940> - <U+"| __truncated__ "<U+091C><U+0928><U+0930><U+0932> <U+0938><U+0947><U+0915><U+094D><U+0930><U+0947><U+091F><U+0930><U+0940> - <U+"| __truncated__ "<U+091C><U+0928><U+0930><U+0932> <U+0938><U+0947><U+0915><U+094D><U+0930><U+0947><U+091F><U+0930><U+0940> - <U+"| __truncated__ "<U+091C><U+0928><U+0930><U+0932> <U+0938><U+0947><U+0915><U+094D><U+0930><U+0947><U+091F><U+0930><U+0940> - <U+"| __truncated__ ...
 $ grant_number                        : num [1:88343] 72 72 72 72 72 72 72 47 72 72 ...
 $ fiscal_year                         : chr [1:88343] "2017-2018" "2019-2020" "2019-2020" "2021-2022" ...
 $ district_number                     : num [1:88343] 43 43 43 43 43 11 88 83 68 6 ...
 $ ddo_number                          : num [1:88343] 4508 4508 4508 4508 4508 ...
 $ grant_name                          : chr [1:88343] "EDUCATION (SECONDARY)" "EDUCATION (SECONDARY)" "EDUCATION (SECONDARY)" "EDUCATION (SECONDARY)" ...
 $ division_name                       : chr [1:88343] "LUCKNOW COLL." "LUCKNOW COLL." "LUCKNOW COLL." "LUCKNOW COLL." ...

Start with the Exploratory Data Analysis (EDA).

glimpse(haq_final)
Rows: 88,343
Columns: 15
$ scheme_code                            <dbl> 2.071011e+12, 2.071011e+12, 2.0~
$ scheme_name                            <chr> "Pension payment to employees o~
$ `Classification of the type of Scheme` <chr> "Benefits both boy and girl stu~
$ `Mode of Benefit Transfer`             <chr> "Payments & Awards", "Payments ~
$ scheme_allotment                       <dbl> 23300000, 20000000, 20000000, 2~
$ scheme_expenditure                     <dbl> 23300000, 19911624, 19911624, 2~
$ scheme_utilisation                     <chr> "100.0", "99.56", "99.56", "84.~
$ district_name                          <chr> "LUCKNOW COLL.", "LUCKNOW COLL.~
$ ddo_name                               <chr> "<U+091C><U+0928><U+0930><U+0932> <U+0938><U+0947><U+0915><U+094D><U+0930><U+0947><U+091F><U+0930><U+0940> - <U+0938><U+0948><U+0928><U+093F><U+0915> <U+0938><U+094D><U+0915><U+0942><U+0932> <U+0938><U+094B><U+0938><U+093E><U+0907><U+091F><U+0940>~
$ grant_number                           <dbl> 72, 72, 72, 72, 72, 72, 72, 47,~
$ fiscal_year                            <chr> "2017-2018", "2019-2020", "2019~
$ district_number                        <dbl> 43, 43, 43, 43, 43, 11, 88, 83,~
$ ddo_number                             <dbl> 4508, 4508, 4508, 4508, 4508, 4~
$ grant_name                             <chr> "EDUCATION (SECONDARY)", "EDUCA~
$ division_name                          <chr> "LUCKNOW COLL.", "LUCKNOW COLL.~
unique(haq_final$scheme_code) # use unique() to get the levels of categorical variables
  [1] 2.071011e+12 2.071011e+12 2.071011e+12 2.071011e+12 2.071011e+12
  [6] 2.202010e+12 2.202010e+12 2.202011e+12 2.202011e+12 2.202011e+12
 [11] 2.202011e+12 2.202011e+12 2.202011e+12 2.202011e+12 2.202011e+12
 [16] 2.202011e+12 2.202011e+12 2.202011e+12 2.202011e+12 2.202011e+12
 [21] 2.202011e+12 2.202011e+12 2.202011e+12 2.202011e+12 2.202011e+12
 [26] 2.202011e+12 2.202011e+12 2.202011e+12 2.202018e+12 2.202018e+12
 [31] 2.202018e+12 2.202018e+12 2.202018e+12 2.202018e+12 2.202018e+12
 [36] 2.202018e+12 2.202018e+12 2.202018e+12 2.202018e+12 2.202018e+12
 [41] 2.202018e+12 2.202020e+12 2.202020e+12 2.202020e+12 2.202021e+12
 [46] 2.202021e+12 2.202021e+12 2.202021e+12 2.202021e+12 2.202021e+12
 [51] 2.202021e+12 2.202021e+12 2.202021e+12 2.202021e+12 2.202021e+12
 [56] 2.202021e+12 2.202021e+12 2.202021e+12 2.202021e+12 2.202021e+12
 [61] 2.202021e+12 2.202021e+12 2.202021e+12 2.202021e+12 2.202021e+12
 [66] 2.202021e+12 2.202021e+12 2.202021e+12 2.202021e+12 2.202021e+12
 [71] 2.202021e+12 2.202021e+12 2.202021e+12 2.202021e+12 2.202021e+12
 [76] 2.202028e+12 2.202028e+12 2.202028e+12 2.202028e+12 2.202028e+12
 [81] 2.202028e+12 2.202028e+12 2.202028e+12 2.202028e+12 2.202028e+12
 [86] 2.202028e+12 2.202028e+12 2.202028e+12 2.202028e+12 2.202028e+12
 [91] 2.202028e+12 2.202028e+12 2.202028e+12 2.202028e+12 2.202028e+12
 [96] 2.202028e+12 2.202028e+12 2.202028e+12 2.202050e+12 2.202051e+12
[101] 2.202051e+12 2.202051e+12 2.202051e+12 2.202051e+12 2.202051e+12
[106] 2.202051e+12 2.202800e+12 2.202800e+12 2.202800e+12 2.202800e+12
[111] 2.202800e+12 2.202800e+12 2.202800e+12 2.202800e+12 2.202800e+12
[116] 2.202800e+12 2.202800e+12 2.202800e+12 2.202800e+12 2.202800e+12
[121] 2.202800e+12 2.202800e+12 2.202808e+12 2.202808e+12 2.202808e+12
[126] 2.202808e+12 2.204000e+12 2.204001e+12 2.204001e+12 2.204001e+12
[131] 2.204001e+12 2.204001e+12 2.204001e+12 2.204001e+12 2.204001e+12
[136] 2.204001e+12 2.204001e+12 2.204001e+12 2.204001e+12 2.204001e+12
[141] 2.204001e+12 2.204001e+12 2.204001e+12 2.204001e+12 2.204001e+12
[146] 2.204001e+12 2.204001e+12 2.204001e+12 2.205001e+12 2.205001e+12
[151] 2.205001e+12 2.205001e+12 2.205001e+12 2.205001e+12 4.202012e+12
[156] 4.202012e+12 4.202012e+12 4.202012e+12 4.202012e+12 4.202012e+12
[161] 4.202012e+12 4.202012e+12 4.202012e+12 4.202012e+12 4.202012e+12
[166] 4.202012e+12 4.202012e+12 4.202012e+12 4.202012e+12 4.202012e+12
[171] 4.202012e+12 4.202012e+12 4.202012e+12 4.202012e+12 4.202012e+12
[176] 4.202012e+12 4.202012e+12 4.202012e+12 4.202012e+12 4.202012e+12
[181] 4.202012e+12 4.202012e+12 4.202012e+12 4.202018e+12 4.202018e+12
[186] 4.202018e+12 4.202018e+12 4.202018e+12 4.202021e+12 4.202021e+12
[191] 4.202021e+12 4.202021e+12 4.202021e+12 4.202021e+12 4.202021e+12
[196] 4.202021e+12 4.202021e+12 4.202028e+12 4.202028e+12 4.202028e+12
[201] 4.202028e+12 4.202038e+12 4.202038e+12 4.202038e+12 4.202038e+12
[206] 4.202038e+12 4.202038e+12 4.202038e+12 4.202038e+12 4.202038e+12
[211] 4.202038e+12 4.202038e+12 4.202038e+12 4.202038e+12 4.202038e+12
[216] 4.202038e+12 4.202038e+12 4.202038e+12 4.202038e+12 4.202038e+12
[221] 4.202038e+12 4.202038e+12 4.202038e+12 4.202038e+12 4.202038e+12
[226] 4.202038e+12 4.202038e+12 4.202038e+12 4.202038e+12 4.202038e+12
[231] 4.202038e+12 4.202038e+12 4.202038e+12 4.202038e+12 4.202038e+12
[236] 4.202038e+12 4.202038e+12 4.202038e+12 4.202038e+12 4.202038e+12
[241] 4.202038e+12 4.202038e+12 4.202038e+12 4.202038e+12 4.202038e+12
[246] 4.202041e+12 4.202041e+12 4.202041e+12 4.202041e+12 4.202041e+12
[251] 4.202041e+12 4.202041e+12 4.202041e+12 4.202048e+12 4.202048e+12
[256] 4.202048e+12 4.202048e+12 4.202048e+12 4.202048e+12 4.202048e+12
[261] 4.202048e+12 4.202048e+12 4.202048e+12 4.202048e+12 4.202048e+12
[266] 4.202048e+12 4.202048e+12 4.202048e+12 4.202048e+12 4.202048e+12
[271] 4.202048e+12 4.202048e+12 4.202048e+12 4.202048e+12
unique(haq_final$Scheme_Name)
Warning: Unknown or uninitialised column: `Scheme_Name`.
NULL
unique(haq_final$`Classification of the type of Scheme`)
[1] "Benefits both boy and girl students directly"  
[2] "Benefits boy students exclusively"             
[3] "Benefits girl students exclusively"            
[4] "Benefits both boy and girl students indirectly"
unique(haq_final$`Mode of Benefit Transfer`)
[1] "Payments & Awards"                "Institutional Grants"            
[3] "In-kind service delivery"         "Direct Cash Transfer to students"
[5] "Others"                           "Infrastructure"                  
unique(haq_final$district_name)
 [1] "LUCKNOW COLL."    "BAREILLY"         "KANSHI RAM NAGAR" "BAGHPAT SADAR"   
 [5] "FIROZABAD"        "ALIGARH"          "BIJNOR"           "MORADABAD"       
 [9] "BHADOHI"          "KANPUR NAGAR"     "KHERI"            "HATHRAS"         
[13] "GORAKHPUR"        "FATEHPUR"         "ETAWAH"           "BANDA"           
[17] "J.P.NAGAR"        "SHAHJAHANPUR"     "DEORIA"           "SADAR TRY-HMRPR" 
[21] "GHAZIABAD"        "SRAWASTI"         "FAIZABAD"         "LALITPUR"        
[25] "VARANASI"         "ALLAHABAD"        "KAUSHAMBI"        "KANNAUJ"         
[29] "MATHURA"          "HARDOI"           "AMBEDKARNAGAR"    "SANT KABIR NAGR" 
[33] "SIDDHARTH NAGAR"  "SULTANPUR"        "MIRZAPUR"         "BADAUN"          
[37] "BULANDSHAHR"      "MAINPURI"         "MAHOBA"           "CHITRAKOOT"      
[41] "JHANSI-MAIN"      "ETAH"             "MUZAFFARNAGAR"    "SITAPUR"         
[45] "BALLIA"           "KANPUR DEHAT"     "AZAMGARH"         "FARRUKHABAD"     
[49] "JAUNPUR"          "MAU"              "SAMBHAL"          "AURAIYA"         
[53] "HAPUR"            "AYODHYA"          "G.B.NAGAR"        "PRATAPGARH"      
[57] "MAHARAJGANJ"      "RAEBARELI"        "TRY.SONBHADRA"    "ALLAHABAD II"    
[61] "BAHRAICH"         "GHAZIPUR"         "SHAMLI"           "BASTI"           
[65] "KUSHI NAGAR"      "PILIBHIT"         "RAMPUR"           "CHANDAULI"       
[69] "MEERUT SADAR"     "GONDA"            "BALRAMPUR"        "SAHARANPUR"      
[73] "CSMAHARAJ NAGAR"  "UNNAO TREASURY"   "BARABANKI"        "ORAI"            
[77] "PRAYAGRAJ-COLL"   "PRAYAGRAJ-CIV"    "LUCKNOW-JB"      
unique(haq_final$ddo_name)
 [1] "<U+091C><U+0928><U+0930><U+0932> <U+0938><U+0947><U+0915><U+094D><U+0930><U+0947><U+091F><U+0930><U+0940> - <U+0938><U+0948><U+0928><U+093F><U+0915> <U+0938><U+094D><U+0915><U+0942><U+0932> <U+0938><U+094B><U+0938><U+093E><U+0907><U+091F><U+0940>"
 [2] "<U+091C><U+093F><U+0932><U+093E> <U+0935><U+093F><U+0926><U+094D><U+092F><U+093E><U+0932><U+092F> <U+0928><U+093F><U+0930><U+0940><U+0915><U+094D><U+0937><U+0915> - <U+0905><U+0936><U+093E><U+0938><U+0915><U+0940><U+092F> <U+092E><U+093E>0 <U+0935><U+093F>0"
 [3] "<U+0938><U+0902><U+092F><U+0941><U+0915><U+094D><U+0924> <U+0928><U+093F><U+0926><U+0947><U+0936><U+0915> <U+092A><U+094D><U+0930><U+093E><U+0935><U+093F><U+0927><U+093F><U+0915> <U+0936><U+093F><U+0915><U+094D><U+0937><U+093E>"
 [4] "<U+0935><U+093F><U+0924><U+094D><U+0924> <U+090F><U+0935><U+0902> <U+0932><U+0947><U+0916><U+093E><U+0927><U+093F><U+0915><U+093E><U+0930><U+0940>"
 [5] "-"                                           
 [6] "<U+0938><U+0902><U+092F><U+0941><U+0915><U+094D><U+0924> <U+0928><U+093F><U+0926><U+0947><U+0936><U+0915>- <U+092E><U+0941><U+0916><U+094D><U+092F><U+093E><U+0932><U+092F>"
 [7] "<U+0935><U+093F><U+0924><U+094D><U+0924> <U+0928><U+093F><U+092F><U+0902><U+0924><U+094D><U+0930><U+0915> - <U+092C><U+0947><U+0938><U+093F><U+0915> <U+0936><U+093F><U+0915><U+094D><U+0937><U+093E> <U+092A><U+0930><U+093F><U+0937><U+0926>"
 [8] "<U+091C><U+093F><U+0932><U+093E><U+0927><U+093F><U+0915><U+093E><U+0930><U+0940>"
 [9] "<U+092A><U+094D><U+0930><U+093E><U+091A><U+093E><U+0930><U+094D><U+092F> -<U+0928><U+093F><U+0926><U+0947><U+0936><U+0915>"
[10] "<U+0928><U+093F><U+0926><U+0947><U+0936><U+0915> -<U+092C><U+0947>0 <U+0936><U+093F>0 -<U+0915><U+0948><U+092E><U+094D><U+092A> <U+0915><U+093E>0"
[11] "<U+0928><U+093F><U+0926><U+0947><U+0936><U+0915> - <U+092C><U+0947>0 <U+0936><U+093F>0 <U+0928><U+093F><U+0926><U+0947>0"
[12] "<U+0928><U+093F><U+0926><U+0947><U+0936><U+0915> - <U+092E><U+093E><U+0927><U+094D><U+092F>0 <U+0936><U+093F>0 <U+0928><U+093F><U+0926><U+0947>0"
[13] "<U+0928><U+093F><U+0926><U+0947><U+0936><U+0915> - <U+0905><U+0928><U+094C><U+092A><U+091A><U+093E><U+0930><U+093F><U+0915> <U+0936><U+093F><U+0915><U+094D><U+0937><U+093E>"
[14] "<U+092A><U+094D><U+0930><U+093E><U+091A><U+093E><U+0930><U+094D><U+092F> - <U+091C><U+093F><U+0932><U+093E> <U+0936><U+093F><U+0915><U+094D><U+0937><U+093E> <U+090F><U+0935><U+0902> <U+092A><U+094D><U+0930><U+0936><U+093F><U+0915><U+094D><U+0937><U+0923> <U+0938><U+0902><U+0938><U+094D><U+0925><U+093E><U+0928>"
[15] "<U+0928><U+093F><U+0926><U+0947><U+0936><U+0915> - <U+0930><U+093E><U+091C><U+094D><U+092F> <U+0936><U+0948>0 <U+0905><U+0928><U+0941>0 <U+090F><U+0935><U+0902> <U+092A><U+094D><U+0930>0 <U+092A><U+0930><U+093F>0 - <U+0939><U+093F><U+0902><U+0926><U+0940> <U+0938><U+0902><U+0938><U+094D><U+0925><U+093E><U+0928>"
[16] "<U+0935><U+093F><U+0924><U+094D><U+0924> <U+090F><U+0935><U+0902> <U+0932><U+0947><U+0916><U+093E><U+0927><U+093F><U+0915><U+093E><U+0930><U+0940> - <U+092E><U+093E><U+0927><U+094D><U+092F><U+092E><U+093F><U+0915>"
[17] "<U+091C><U+093F><U+0932><U+093E> <U+0905><U+0932><U+094D><U+092A><U+0938><U+0902><U+0916><U+094D><U+092F><U+0915> <U+0915><U+0932><U+094D><U+092F><U+093E><U+0923> <U+0905><U+0927><U+093F><U+0915><U+093E><U+0930><U+0940>"
[18] "<U+0928><U+093F><U+0926><U+0947><U+0936><U+0915>- <U+0905><U+0932><U+094D><U+092A><U+0938><U+0902><U+0916><U+094D><U+092F><U+0915> <U+0915><U+0932><U+094D><U+092F><U+093E><U+0923> <U+0928><U+093F><U+0926><U+0947><U+0936><U+093E><U+0932><U+092F>"
[19] "<U+0930><U+091C><U+093F><U+0938><U+094D><U+091F><U+094D><U+0930><U+093E><U+0930> <U+0909>0 <U+092A><U+094D><U+0930>0 <U+092E><U+0926><U+0930><U+0938><U+093E> <U+0936><U+093F><U+0915><U+094D><U+0937><U+093E> <U+092A><U+0930><U+093F><U+0937><U+0926>"
[20] "<U+0928><U+093F><U+0926><U+0947><U+0936><U+0915>- <U+092E><U+093E><U+0927><U+094D><U+092F>0 <U+0936><U+093F>0- <U+0915><U+0948><U+092E><U+094D><U+092A> <U+0915><U+093E>0"
[21] "<U+091C><U+093F><U+0932><U+093E> <U+0935><U+093F><U+0926><U+094D><U+092F><U+093E><U+0932><U+092F> <U+0928><U+093F><U+0930><U+0940><U+0915><U+094D><U+0937><U+0915> - <U+0935><U+093F><U+0924><U+094D><U+0924> <U+090F><U+0935><U+0902> <U+0932><U+0947><U+0916><U+093E><U+0927><U+093F><U+0915><U+093E><U+0930><U+0940>"
[22] "<U+0905><U+092A><U+0930> <U+0938><U+091A><U+093F><U+0935> -<U+0915><U+094D><U+0937><U+0947><U+0924><U+094D><U+0930><U+0940><U+092F> <U+0915><U+093E>0"
[23] "<U+0938><U+091A><U+093F><U+0935> - <U+092E><U+093E><U+0927><U+094D><U+092F>0 <U+0936><U+093F>0 <U+092A><U+0930><U+093F>0"
[24] "<U+0928><U+093F><U+0926><U+0947><U+0936><U+0915> - <U+0930><U+093E><U+091C><U+094D><U+092F> <U+0935><U+093F><U+091C><U+094D><U+091E><U+093E><U+0928> <U+0936><U+093F><U+0915><U+094D><U+0937><U+093E> <U+0938><U+0902><U+0938><U+094D><U+0925><U+093E><U+0928>"
[25] "<U+0905><U+0928><U+0941> <U+0938><U+091A><U+093F><U+0935> - <U+092D><U+093E><U+0937><U+093E> <U+0935><U+093F><U+092D><U+093E><U+0917>"
[26] "<U+092A><U+094D><U+0930><U+093E><U+091A><U+093E><U+0930><U+094D><U+092F> - <U+091C><U+093F><U+0932><U+093E> <U+0936><U+093F><U+0915><U+094D><U+0937><U+093E> <U+090F><U+0935><U+0902> <U+092A><U+094D><U+0930><U+0936><U+093F> <U+0938><U+0902>. - <U+0915><U+093E><U+0928><U+092A><U+0941><U+0930> <U+0926><U+0947>0"
[27] "<U+0915><U+094D><U+0930><U+0940><U+0921><U+093E><U+0927><U+093F><U+0915><U+093E><U+0930><U+0940>"
[28] "<U+0915><U+094D><U+0937><U+0947><U+0924><U+094D><U+0930><U+0940><U+092F> <U+0915><U+094D><U+0930><U+0940><U+0921><U+093E><U+0927><U+093F><U+0915><U+093E><U+0930><U+0940>"
[29] "<U+0909><U+092A> <U+0928><U+093F><U+0926><U+0947><U+0936><U+0915> - <U+0916><U+0947><U+0932>"
[30] "<U+0915><U+092E><U+093E><U+0928> <U+0905><U+0927><U+093F><U+0915><U+093E><U+0930><U+0940>"
[31] "<U+092A><U+094D><U+0930><U+0936><U+093E><U+0938><U+0928><U+093F><U+0915> <U+0905><U+0927><U+093F><U+0915><U+093E><U+0930><U+0940> - <U+090F><U+0928>0 <U+0938><U+0940>0 <U+0938><U+0940>0 <U+0917><U+094D><U+0930><U+0941><U+092A> <U+092E><U+0941>0"
[32] "<U+0938><U+0902><U+092F><U+0941><U+0915><U+094D><U+0924> <U+0928><U+093F><U+0926><U+0947><U+0936><U+0915> -<U+0935><U+093F><U+0924><U+094D><U+0924> <U+090F><U+0935><U+0902> <U+0932><U+0947><U+0916><U+093E>- <U+090F><U+0928>0<U+0938><U+0940>0<U+0938><U+0940>0 <U+0928><U+093F><U+0926><U+0947><U+0936><U+093E><U+0932><U+092F>"
[33] "<U+091C><U+093F><U+0932><U+093E> <U+092F><U+0941><U+0935><U+093E> <U+0915><U+0932><U+094D><U+092F><U+093E><U+0923> <U+090F><U+0935><U+0902> <U+092A><U+0940>.<U+0935><U+0940>.<U+0921><U+0940>. <U+0905><U+0927><U+093F><U+0915><U+093E><U+0930><U+0940>"
[34] "<U+0905><U+0928><U+0941> <U+0938><U+091A><U+093F><U+0935> - <U+0936><U+093F><U+0915><U+094D><U+0937><U+093E> <U+0935><U+093F><U+092D><U+093E><U+0917>"
[35] "<U+092A><U+094D><U+0930><U+0927><U+093E><U+0928><U+093E><U+091A><U+093E><U+0930><U+094D><U+092F> <U+0930><U+093E><U+091C><U+0915><U+0940><U+092F> <U+092A><U+093E><U+0932><U+0940><U+091F><U+0947><U+0915><U+094D><U+0928><U+093F><U+0915>"
[36] "<U+0928><U+093F><U+0926><U+0947><U+0936><U+0915>-<U+0905><U+092D><U+093F><U+0932><U+0947><U+0916><U+093E><U+0917><U+093E><U+0930>"
[37] "<U+0928><U+093F><U+0926><U+0947><U+0936><U+0915> -<U+092A><U+0941><U+0930><U+093E><U+0924><U+0924><U+094D><U+0935>"
[38] "<U+0928><U+093F><U+0926><U+0947><U+0936><U+0915> - <U+0938><U+0902><U+0917><U+094D><U+0930><U+0939><U+093E><U+0932><U+092F>"
[39] "<U+0928><U+093F><U+0926><U+0947><U+0936><U+0915>- <U+0938><U+0902><U+0938><U+094D><U+0915><U+0943><U+0924><U+093F>"
unique(haq_final$grant_number)
 [1] 72 47 71 62 83 81 48 39 75 22 14 92
unique(haq_final$fiscal_year)
[1] "2017-2018" "2019-2020" "2021-2022" "2020-2021" "2016-2017" "2018-2019"
unique(haq_final$district_number)
 [1] 43 11 88 83 68  6 12 14 72 20 48 78 32 21 19 26 86 15 35 25 59 85 49 58 27
[26] 22 82 84  7 47 74 80 67 52 28 13  5  9 71 87 23 10  3 46 31 62 34 18 29 66
[51] 92 81 90 76 53 70 45 69 64 51 30 91 33 73 16 17 77  4 50 79  2 89 44 54 24
[76] 60
unique(haq_final$ddo_number)
 [1] 4508 4506 4110 4354 6001 4108 6040 6010 4371 4217 6009 4104 6008 4368 4369
[16] 4503 4372 6002 4607 4606 4480 4139 4137 4140 4502 4504 4501 4505 4609 4670
[31] 6016 4608 2442 2443 2445 4476 4477 4478 2289 4669 4102 4779 4773 4776 4780
unique(haq_final$grant_name)
 [1] "EDUCATION (SECONDARY)"      "TECHANICAL EDUCATION"      
 [3] "EDUCATION (PRIMARY)"        "FIN.(SUPER ANNUAN.PENSION)"
 [5] "SOCIAL WEL.(SPL.COMP.SC.)"  "SOCIAL WELFARE (ST)"       
 [7] "MUSLIM WAQF"                "LANGUAGE"                  
 [9] "EDUCATION (S.C.E.R.T.)"     "SPORTS"                    
[11] "AGR.& OTHER(PANCHATIRAJ)"   "CULTURAL AFFAIR"           
unique(haq_final$division_name)
 [1] "LUCKNOW COLL."  "BAREILLY"       "AGRA"           "MEERUT SADAR"  
 [5] "MORADABAD"      "MIRZAPUR"       "KANPUR NAGAR"   "GORAKHPUR"     
 [9] "ALLAHABAD"      "BANDA"          "GONDA"          "FAIZABAD"      
[13] "JHANSI-MAIN"    "VARANASI"       "AYODHYA"        "BASTI"         
[17] "SAHARANPUR"     "AZAMGARH"       "PRAYAGRAJ-COLL"

Explore the share of each of the classification of the type of scheme in the total expenditure.

total_exp <- sum(haq_final$scheme_expenditure)

for(i in unique(haq_final$`Classification of the type of Scheme`)){
  df <- paste("Sch",i,sep = "_")
  assign(df, haq_final[haq_final$`Classification of the type of Scheme`==i,])
}

exp_boys <- sum(`Sch_Benefits boy students exclusively`$scheme_expenditure)
exp_boys/total_exp*100
[1] 1.556087
exp_girls <- sum(`Sch_Benefits girl students exclusively`$scheme_expenditure)
exp_girls/total_exp*100
[1] 0.2657823
exp_both_directly <- sum(`Sch_Benefits both boy and girl students directly`$scheme_expenditure)
exp_both_directly/total_exp*100
[1] 97.26222
exp_both_indirectly <- sum(`Sch_Benefits both boy and girl students indirectly`$scheme_expenditure)
exp_both_indirectly/total_exp*100
[1] 0.915911
type_scheme <- c("Benefits boys only", "Benefits girls only", "Benefits both directly", "Benefits both indirectly")
share_scheme <- c(1.556087,0.2657823,97.26222,0.915911)
share_typesofschemes <- data.frame(type_scheme,share_scheme)
datatable(share_typesofschemes)
# produce a graph using ggplot for each of them.         
ggplot(data = share_typesofschemes, aes(x = type_scheme, y = share_scheme)) + geom_bar(aes(fill = share_scheme), stat = 'identity')

compare the expenditure of each district on different types of schemes.

onlygirls <- aggregate(x = `Sch_Benefits girl students exclusively`$scheme_expenditure,
                       by= list(`Sch_Benefits girl students exclusively`$district_number),
                       FUN=sum)
colnames(onlygirls)[which(names(onlygirls) == "Group.1")] <- "district_no"
colnames(onlygirls)[which(names(onlygirls) == "x")] <- "expenditure"
onlygirls <- onlygirls[order(onlygirls$expenditure),]
datatable(onlygirls)
onlyboys <- aggregate(x = `Sch_Benefits boy students exclusively`$scheme_expenditure,
                      by= list(`Sch_Benefits boy students exclusively`$district_number),
                      FUN=sum)
colnames(onlyboys)[which(names(onlyboys) == "Group.1")] <- "district_no"
colnames(onlyboys)[which(names(onlyboys) == "x")] <- "expenditure"
onlyboys <- onlyboys[order(onlyboys$expenditure),]
datatable(onlyboys)
bothdirectly <- aggregate(x = `Sch_Benefits both boy and girl students directly`$scheme_expenditure,
                          by= list(`Sch_Benefits both boy and girl students directly`$district_number),
                          FUN=sum)
colnames(bothdirectly)[which(names(bothdirectly) == "Group.1")] <- "district_no"
colnames(bothdirectly)[which(names(bothdirectly) == "x")] <- "expenditure"
bothdirectly <- bothdirectly[order(bothdirectly$expenditure),]
datatable(bothdirectly)
bothindirectly <- aggregate(x = `Sch_Benefits both boy and girl students indirectly`$scheme_expenditure,
                            by= list(`Sch_Benefits both boy and girl students indirectly`$district_number),
                            FUN=sum)
colnames(bothindirectly)[which(names(bothindirectly) == "Group.1")] <- "district_no"
colnames(bothindirectly)[which(names(bothindirectly) == "x")] <- "expenditure"
bothindirectly <- bothindirectly[order(bothindirectly$expenditure),]
datatable(bothindirectly)

#create a dataframe for schemes whose expenditure is zero.

exp_zero <- haq_final [(haq_final$scheme_expenditure == 0),]
datatable(exp_zero)
Warning in instance$preRenderHook(instance): It seems your data is too big
for client-side DataTables. You may consider server-side processing: https://
rstudio.github.io/DT/server.html
unique(exp_zero$scheme_utilisation) 
  [1] "0.0"    "-"      ".00"    "4.2"    "45.3"   "98.73"  "49.25"  "27.71" 
  [9] "79.62"  "100.0"  "40.4"   "1.18"   "97.01"  "10.76"  "29.46"  "17.85" 
 [17] "64.39"  "33.46"  "56.1"   "66.63"  "57.78"  "61.42"  "28.4"   "37.06" 
 [25] "56.86"  "12.25"  "20.21"  "65.12"  "64.83"  "49.97"  "24.99"  "49.84" 
 [33] "64.61"  "49.63"  "24.61"  "50.98"  "48.85"  "24.43"  "45.59"  "49.57" 
 [41] "49.31"  "49.58"  "100.00" "59.40"  "42.30"  "24.10"  "9.46"   "94.83" 
 [49] "92.84"  "21.91"  "40.77"  "31.47"  "37.64"  "20.01"  "91.20"  "68.56" 
 [57] "2.64"   "92.65"  "89.70"  "552.00" "97.22"  "66.52"  "4.31"   ".34"   
 [65] ".49"    "8.40"   "87.27"  "19.55"  "43.48"  "75.09"  "12.00"  "57.72" 
 [73] "23.53"  "67.53"  "22.99"  "10.34"  "35.88"  "38.38"  "11.14"  "27.27" 
 [81] "74.63"  "23.39"  "78.97"  "48.38"  "13.68"  "99.70"  "77.84"  "99.92" 
 [89] "99.80"  "99.94"  "73.51"  "60.0"   "61.78"  "62.34"  "76.42"  "30.11" 
 [97] "66.66"  "66.09"  "61.41"  "33.32"  "25.66"  "16.92"  "27.75"  "8.44"  
[105] "4.00"   "29.37"  "19.26"  "62.52"  "39.29"  "12.94"  "19.25"  "38.88" 
[113] "24.22"  "8.68"   "24.67"  "5.16"   "46.51"  "23.61"  "25.10"  "40.52" 
[121] "65.90"  "5.99"   "54.03"  "16.16"  "11.61"  "15.87"  "15.50"  "47.17" 
[129] "30.43"  "24.56"  "50.02"  "36.87"  "11.76"  "18.91"  "98.16"  "53.07" 
[137] "24.13"  "47.39"  "50.00"  "11.43"  "47.28"  "35.46"  "25.16"  "9.98"  
[145] "26.97"  "61.82"  "48.86"  "64.23"  "20.50"  "22.68"  "33.80"  "46.27" 
[153] "32.90"  "34.54"  "55.11"  "5.04"   "61.01"  "97.90"  "9.97"   ".18"   
[161] "27.12"  "46.60"  "47.51"  "17.03"  "67.84"  "8.16"   "7.72"   "71.86" 
[169] "98.52"  "26.99"  "81.91"  "10.92"  "46.01"  "96.05"  "17.13"  "70.72" 
[177] "1.62"   "41.87"  "30.24"  "11.07"  "85.56"  "14.24"  "99.97"  "33.90" 
[185] "34.03"  "37.75"  "94.65"  "43.05"  "51.40"  "50.11"  "36.13"  "22.14" 
[193] "57.88"  "80.38"  "16.69"  "8.32"   "2.85"   "47.96"  "36.67"  "2.38"  
[201] "28.50"  "18.81"  "25.00"  "26.06"  "60.78"  "17.32"  "18.06"  "25.98" 
[209] "26.25"  "21.86"  "20.15"  "26.05"  "21.29"  "67.86"  "21.65"  "20.54" 
[217] "41.89"  "36.19"  "50.03"  "75.73" 
# can scheme utilization be anything other than 0 if expenditure is 0 !?    

find out the grant type with highest amount of expenditure.

for(i in unique(haq_final$grant_number)){
  df1 <- paste("Grant",i,sep = "_")
  assign(df1, haq_final[haq_final$grant_number==i,])
}

exp_grant14 <- sum(Grant_14$scheme_expenditure)
share_grant14 = exp_grant14/total_exp*100

exp_grant92 <- sum(Grant_92$scheme_expenditure)
share_grant92 = exp_grant92/total_exp*100

exp_grant22 <- sum(Grant_22$scheme_expenditure)
share_grant22 = exp_grant22/total_exp*100

exp_grant75 <- sum(Grant_75$scheme_expenditure)
share_grant75 = exp_grant75/total_exp*100

exp_grant72 <- sum(Grant_72$scheme_expenditure)
share_grant72 = exp_grant72/total_exp*100

exp_grant71 <- sum(Grant_71$scheme_expenditure)
share_grant71 = exp_grant71/total_exp*100

exp_grant47 <- sum(Grant_47$scheme_expenditure)
share_grant47 = exp_grant47/total_exp*100

exp_grant62 <- sum(Grant_62$scheme_expenditure)
share_grant62 = exp_grant62/total_exp*100

exp_grant83 <- sum(Grant_83$scheme_expenditure)
share_grant83 = exp_grant83/total_exp*100

exp_grant81 <- sum(Grant_81$scheme_expenditure)
share_grant81 = exp_grant81/total_exp*100

exp_grant48 <- sum(Grant_48$scheme_expenditure)
share_grant48 = exp_grant48/total_exp*100

exp_grant39 <- sum(Grant_39$scheme_expenditure)
share_grant39 = exp_grant39/total_exp*100

share_grants <- c(share_grant14,share_grant22,share_grant39,share_grant47,share_grant48,share_grant62,share_grant71,share_grant72,share_grant75,share_grant81,share_grant83,share_grant92)
name_grants <- c("14", "22","39", "47", "48", "62","71","72", "75", "81", "83","92")
grant_comparison <- data.frame(name_grants,share_grants)
datatable(grant_comparison)
ggplot(data = grant_comparison, aes(x = name_grants, y = share_grants)) + geom_bar(aes(fill = share_grants), stat = 'identity')

unique(Grant_71$grant_name)
[1] "EDUCATION (PRIMARY)"
unique(Grant_72$grant_name)
[1] "EDUCATION (SECONDARY)"

identify the observations(/schemes) with the highest and lowest expenditure.

max(haq_final$scheme_expenditure)  
[1] 14787739297
highest_exp <- haq_final[(haq_final$scheme_expenditure == 14787739297), ]
datatable(highest_exp)
min(haq_final$scheme_expenditure)
[1] 0
lowest_exp <- haq_final[(haq_final$scheme_expenditure == 0), ]
datatable(lowest_exp)
Warning in instance$preRenderHook(instance): It seems your data is too big
for client-side DataTables. You may consider server-side processing: https://
rstudio.github.io/DT/server.html
unique(lowest_exp$grant_number)
 [1] 72 71 62 81 48 83 39 75 22 14 47
culturalaffair <- haq_final[(haq_final$grant_name =="CULTURAL AFFAIR"),]
datatable(culturalaffair)
unique(culturalaffair$grant_number) 
[1] 92

#So, 92(Cultural affairs)doesn’t have any 0 expenditure scheme #none of them directly benefits boys and girls, all of them are in Lucknow, only one in Gorakhpur #maximum utilization - 100% #this is the only grant which doesn’t have zero scheme expenditure #mode of benefit is infrastructure

identify the observations(/schemes) with the highest and lowest allotment.

max(haq_final$scheme_allotment)       
[1] 14787739297
highest_allot <- haq_final[(haq_final$scheme_allotment == 14787739297), ]
datatable(highest_allot)
min(haq_final$scheme_allotment)
[1] -452100000
lowest_allot <- haq_final[(haq_final$scheme_allotment < 0), ]
datatable(lowest_allot)
#negative allotment amount!?

Explore the share of each of the mode of benefit of transfer in the total expenditure.

total_exp <- sum(haq_final$scheme_expenditure)

for(i in unique(haq_final$`Mode of Benefit Transfer`)){
  df <- paste(i)
  assign(df, haq_final[haq_final$`Mode of Benefit Transfer`==i,])
}

cash_transfer_exp <- sum(`Direct Cash Transfer to students`$scheme_expenditure)
cash_transfer = cash_transfer_exp/total_exp*100
In_Kind_exp <- sum(`In-kind service delivery`$scheme_expenditure)
In_Kind = In_Kind_exp/total_exp*100
Infra_exp <- sum(Infrastructure$scheme_expenditure)
Infra = Infra_exp/total_exp*100
Inst_Grants_exp <- sum(`Institutional Grants`$scheme_expenditure)
Inst_Grants = Inst_Grants_exp/total_exp*100
PA_exp <- sum(`Payments & Awards`$scheme_expenditure)
Payments_Awards = PA_exp/total_exp*100
others_exp <- sum(Others$scheme_expenditure)
Oth = others_exp/total_exp*100

type_mode <- c("Cash Grants","In-kind Delivery","Infrastructure","Institutional Grants","Payments and Awards","Others")
share_mode <- c(cash_transfer,In_Kind,Infra,Inst_Grants,Payments_Awards,Oth)
share_modesoftransfer <- data.frame(type_mode,share_mode)
datatable(share_modesoftransfer)
ggplot(data = share_modesoftransfer, aes(x = type_mode, y = share_mode)) + geom_bar(aes(fill = share_mode), stat = 'identity')

compare the expenditure of each district on modes of benefit of transfer.

direct_cash <- aggregate(x = `Direct Cash Transfer to students`$scheme_expenditure,
                       by= list(`Direct Cash Transfer to students`$district_number),
                       FUN=sum)
colnames(direct_cash)[which(names(direct_cash) == "Group.1")] <- "district_no"
colnames(direct_cash)[which(names(direct_cash) == "x")] <- "expenditure"
direct_cash <- direct_cash[order(direct_cash$expenditure),]
datatable(direct_cash)
in_kind <- aggregate(x = `In-kind service delivery`$scheme_expenditure,
                         by= list(`In-kind service delivery`$district_number),
                         FUN=sum)
colnames(in_kind)[which(names(in_kind) == "Group.1")] <- "district_no"
colnames(in_kind)[which(names(in_kind) == "x")] <- "expenditure"
in_kind <- in_kind[order(in_kind$expenditure),]
datatable(in_kind)
Infra <- aggregate(x = Infrastructure$scheme_expenditure,
                     by= list(Infrastructure$district_number),
                     FUN=sum)
colnames(Infra)[which(names(Infra) == "Group.1")] <- "district_no"
colnames(Infra)[which(names(Infra) == "x")] <- "expenditure"
Infra <- Infra[order(Infra$expenditure),]
datatable(Infra)
Inst.Grants <- aggregate(x = `Institutional Grants`$scheme_expenditure,
                     by= list(`Institutional Grants`$district_number),
                     FUN=sum)
colnames(Inst.Grants)[which(names(Inst.Grants) == "Group.1")] <- "district_no"
colnames(Inst.Grants)[which(names(Inst.Grants) == "x")] <- "expenditure"
Inst.Grants <- Inst.Grants[order(Inst.Grants$expenditure),]
datatable(Inst.Grants)
OtherModes <- aggregate(x = Others$scheme_expenditure,
                     by= list(Others$district_number),
                     FUN=sum)
colnames(OtherModes)[which(names(OtherModes) == "Group.1")] <- "district_no"
colnames(OtherModes)[which(names(OtherModes) == "x")] <- "expenditure"
OtherModes <- OtherModes[order(OtherModes$expenditure),]
datatable(OtherModes)
Payments_Awards <- aggregate(x = `In-kind service delivery`$scheme_expenditure,
                     by= list(`In-kind service delivery`$district_number),
                     FUN=sum)
colnames(Payments_Awards)[which(names(Payments_Awards) == "Group.1")] <- "district_no"
colnames(Payments_Awards)[which(names(Payments_Awards) == "x")] <- "expenditure"
Payments_Awards <- Payments_Awards[order(Payments_Awards$expenditure),]
datatable(Payments_Awards)